Here, we will annotate the cells of the patient with id “365”.
library(Seurat)
library(SeuratWrappers)
library(harmony)
library(tidyverse)
# Paths
path_to_obj <- here::here("results/R_objects/5.seurat_clustered_365.rds")
path_to_obj_sampl_artifacts <- here::here("results/R_objects/cll_seurat_annotated.rds")
path_to_save <- here::here("results/R_objects/6.seurat_annotated_365.rds")
path_to_save_markers <- here::here("3-clustering_and_annotation/markers_clusters_365.rds")
# Colors
color_palette <- c("black", "gray", "red", "yellow", "violet", "green4",
"blue", "mediumorchid2", "coral2", "blueviolet",
"indianred4", "deepskyblue1", "dimgray", "deeppink1",
"green", "lightgray", "hotpink1")
# Source functions
source(here::here("bin/utils.R"))
# Thresholds
min_log2FC <- 0.3
alpha <- 0.001
seurat <- readRDS(path_to_obj)
seurat
## An object of class Seurat
## 23326 features across 4685 samples within 1 assay
## Active assay: RNA (23326 features, 2000 variable features)
## 3 dimensional reductions calculated: pca, harmony, umap
DimPlot(seurat, cols = color_palette)
seurat_bias <- readRDS(path_to_obj_sampl_artifacts)
As upregulation of cell cycle genes is a hallmark of Richter transformation, we will infer the cell cycle score and phase for each cell:
DimPlot(seurat, group.by = "time_point")
FeaturePlot(seurat, "CD3D")
seurat <- CellCycleScoring(
seurat,
s.features = cc.genes.updated.2019$s.genes,
g2m.features = cc.genes.updated.2019$g2m.genes,
set.ident = FALSE
)
DimPlot(seurat, group.by = "Phase")
umap_s_score <- FeaturePlot(seurat, features = "S.Score") +
scale_color_viridis_c(option = "magma") +
labs(title = "S Score") +
theme(
plot.title = element_text(hjust = 0.5, size = 12, face = "plain"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank()
)
umap_g2m_score <- FeaturePlot(seurat, features = "G2M.Score") +
scale_color_viridis_c(option = "magma") +
labs(title = "G2M Score") +
theme(
plot.title = element_text(hjust = 0.5, size = 12, face = "plain"),
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank()
)
umap_cc_combined <- ggpubr::ggarrange(
plotlist = list(umap_s_score, umap_g2m_score),
nrow = 2,
ncol = 1,
common.legend = FALSE
)
umap_cc_combined
markers <- FindAllMarkers(seurat, only.pos = TRUE, logfc.threshold = min_log2FC)
markers <- markers %>%
mutate(cluster = as.character(cluster)) %>%
filter(p_val_adj < alpha) %>%
arrange(cluster) %>%
group_by(cluster) %>%
arrange(desc(avg_log2FC), .by_group = TRUE)
DT::datatable(markers)
Important literature to annotate the cells:
| Cluster | Markers | Annotation |
|---|---|---|
| 0 | CXCR4 | CLL-like |
| 1_0 | CD27 | CXCR4loCD27hi |
| 1_1 | MIR155HG | MIR155HGhi |
| 1_2 | ENO1, TCL1A | RT-like quiescent |
| 1_3 | PCNA, TOP2A | RT-like proliferative |
Annotate:
seurat$annotation_final <- factor(
seurat$final_clusters,
levels = c("0", "1_0", "1_1", "1_2", "1_3")
)
new_levels_365 <- c("CLL-like", "CXCR4loCD27hi", "MIR155HGhi",
"RT-like quiescent", "RT-like proliferative")
levels(seurat$annotation_final) <- new_levels_365
reordered_levels_365 <- new_levels_365
seurat$annotation_final <- factor(seurat$annotation_final, reordered_levels_365)
Idents(seurat) <- "annotation_final"
# Plot UMAP
cols <- c("#cfc9c9", "#808080", "#333333", "#d65c7a", "#813151")
names(cols) <- levels(seurat$annotation_final)
umap_annotation <- DimPlot(seurat, pt.size = 0.5)
col_labels <- c(
"CLL-like" = bquote("CLL-like"),
"CXCR4loCD27hi" = bquote(CXCR4^lo~CD27^hi),
"MIR155HGhi" = bquote(MIR155HG^hi),
"RT-like quiescent" = bquote("RT-like quiescent"),
"RT-like proliferative" = bquote("RT-like proliferative")
)
umap_annotation <- umap_annotation +
scale_color_manual(values = cols, breaks = names(cols), labels = col_labels) +
theme(
axis.title = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
axis.line = element_blank()
)
umap_annotation
# UMAPs
genes_interest <- c("CXCR4", "CD27", "MIR155HG", "ENO1", "TCL1A",
"PCNA", "TOP2A")
feature_plots <- purrr::map(genes_interest, function(x) {
p <- FeaturePlot(seurat, x, pt.size = 0.5) +
scale_color_viridis_c(option = "magma")
p
})
# Dot plots
dot_plot <- DotPlot(seurat, features = rev(genes_interest)) +
coord_flip() +
scale_color_viridis_c(option = "magma") +
scale_y_discrete(breaks = names(col_labels), labels = col_labels) +
theme(
axis.title = element_blank(),
axis.text.x = element_text(angle = 45, vjust = 1, hjust=1),
legend.title = element_text(size = 12)
)
dot_plot
# Violin plots
vln_plot_s <- seurat@meta.data %>%
ggplot(aes(annotation_final, S.Score)) +
geom_violin(fill = "gray") +
labs(x = "", y = "S Phase Score") +
scale_x_discrete(breaks = names(col_labels), labels = col_labels) +
theme_bw() +
theme(axis.text.x = element_text(color = "black", angle = 45, vjust = 1, hjust = 1, size = 11))
vln_plot_s
vln_plot_g2m <- seurat@meta.data %>%
ggplot(aes(annotation_final, G2M.Score)) +
geom_violin(fill = "gray") +
labs(x = "", y = "G2M Phase Score") +
scale_x_discrete(breaks = names(col_labels), labels = col_labels) +
theme_bw() +
theme(axis.text.x = element_text(color = "black", angle = 45, vjust = 1, hjust = 1, size = 11))
vln_plot_g2m
Although we could recover the major clusters that we have observed in previous patients, it is intriguing that the clusters of MIR155HGhi and CXCR4loCD27hi were found in the RT time-point and not in the CLL. Thus, let us perform a CLL-specific clustering:
seurat_cll <- subset(seurat, time_point == "T2")
seurat_cll <- process_seurat(seurat_cll, dims = 1:20)
feature_plots2 <- purrr::map(c("CXCR4", "CD27", "MIR155HG", "S100A4"), function(x) {
p <- FeaturePlot(seurat_cll, x, pt.size = 0.5) +
scale_color_viridis_c(option = "magma")
p
})
feature_plots2
## [[1]]
##
## [[2]]
##
## [[3]]
##
## [[4]]
Indeed, we see that CXCR4, CD27 and MIR155HG are mutually exclusive, but we cannot distinguish 3 clear clusters
seurat_cll <- FindNeighbors(seurat_cll, dims = 1:20, reduction = "pca")
seurat_cll <- FindClusters(seurat_cll, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 4119
## Number of edges: 134238
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.6852
## Number of communities: 4
## Elapsed time: 0 seconds
DimPlot(seurat_cll)
Could this pattern be explained by a differential processing time between both samples (RT and CLL)?
seurat_bias <- subset(seurat_bias, temperature == "RT")
seurat_bias <- subset(seurat_bias, time == "24h")
DimPlot(seurat_bias)
seurat_bias <- SplitObject(seurat_bias, "cell_type")
seurat_bias <- seurat_bias[c("CLL 1472", "CLL 1892")]
seurat_bias <- purrr::map(seurat_bias, process_seurat, dims = 1:20)
DimPlot(seurat_bias$`CLL 1472`, group.by = "time")
FeaturePlot(seurat_bias$`CLL 1472`, c("CXCR4", "CD27", "MIR155HG"))
DimPlot(seurat_bias$`CLL 1892`, group.by = "time")
FeaturePlot(seurat_bias$`CLL 1892`, c("CXCR4", "CD27", "MIR155HG"))
# Save Seurat object
saveRDS(seurat, path_to_save)
# Save markers
markers$annotation <- factor(markers$cluster)
levels(markers$annotation) <- new_levels_365
markers_list <- purrr::map(levels(markers$annotation), function(x) {
df <- markers[markers$annotation == x, ]
df <- df[, c(7, 1, 5, 2:4, 6, 8)]
df
})
names(markers_list) <- levels(markers$annotation)
markers_list <- markers_list[reordered_levels_365]
markers_final <- bind_rows(markers_list)
saveRDS(markers_list, path_to_save_markers)
saveRDS(
markers_final,
here::here("results/tables/markers/markers_annotated_clusters_patient_365.rds")
)
saveRDS(markers_list, path_to_save_markers)
openxlsx::write.xlsx(
x = markers_list,
file = "results/tables/markers/markers_annotated_clusters_patient_365.xlsx"
)
sessionInfo()
## R version 4.0.4 (2021-02-15)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 20.04.2 LTS
##
## Matrix products: default
## BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.9.0
## LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.9.0
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=es_ES.UTF-8 LC_COLLATE=en_US.UTF-8 LC_MONETARY=es_ES.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=es_ES.UTF-8 LC_NAME=C LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=es_ES.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] forcats_0.5.1 stringr_1.4.0 dplyr_1.0.6 purrr_0.3.4 readr_1.4.0 tidyr_1.1.3 tibble_3.1.2 ggplot2_3.3.3 tidyverse_1.3.1 harmony_1.0 Rcpp_1.0.6 SeuratWrappers_0.3.0 SeuratObject_4.0.2 Seurat_4.0.3 BiocStyle_2.18.1
##
## loaded via a namespace (and not attached):
## [1] readxl_1.3.1 backports_1.2.1 plyr_1.8.6 igraph_1.2.6 lazyeval_0.2.2 splines_4.0.4 crosstalk_1.1.1 listenv_0.8.0 scattermore_0.7 digest_0.6.27 htmltools_0.5.1.1 fansi_0.5.0 magrittr_2.0.1 tensor_1.5 cluster_2.1.1 ROCR_1.0-11 limma_3.46.0 openxlsx_4.2.3 remotes_2.4.0 globals_0.14.0 modelr_0.1.8 matrixStats_0.59.0 spatstat.sparse_2.0-0 colorspace_2.0-1 rvest_1.0.0 ggrepel_0.9.1 haven_2.4.1 xfun_0.23 crayon_1.4.1 jsonlite_1.7.2 spatstat.data_2.1-0 survival_3.2-10 zoo_1.8-9 glue_1.4.2 polyclip_1.10-0 gtable_0.3.0 leiden_0.3.8 car_3.0-10 future.apply_1.7.0 abind_1.4-5 scales_1.1.1 DBI_1.1.1 rstatix_0.7.0 miniUI_0.1.1.1 viridisLite_0.4.0 xtable_1.8-4 reticulate_1.20 spatstat.core_2.1-2 foreign_0.8-81 rsvd_1.0.5 DT_0.18 htmlwidgets_1.5.3 httr_1.4.2 RColorBrewer_1.1-2
## [55] ellipsis_0.3.2 ica_1.0-2 farver_2.1.0 pkgconfig_2.0.3 sass_0.4.0 uwot_0.1.10 dbplyr_2.1.1 deldir_0.2-10 here_1.0.1 utf8_1.2.1 labeling_0.4.2 tidyselect_1.1.1 rlang_0.4.11 reshape2_1.4.4 later_1.2.0 munsell_0.5.0 cellranger_1.1.0 tools_4.0.4 cli_2.5.0 generics_0.1.0 broom_0.7.7 ggridges_0.5.3 evaluate_0.14 fastmap_1.1.0 yaml_2.2.1 goftest_1.2-2 knitr_1.33 fs_1.5.0 fitdistrplus_1.1-5 zip_2.2.0 RANN_2.6.1 pbapply_1.4-3 future_1.21.0 nlme_3.1-152 mime_0.10 xml2_1.3.2 compiler_4.0.4 rstudioapi_0.13 curl_4.3.1 plotly_4.9.4 png_0.1-7 ggsignif_0.6.2 spatstat.utils_2.2-0 reprex_2.0.0 bslib_0.2.5.1 stringi_1.6.2 highr_0.9 RSpectra_0.16-0 lattice_0.20-41 Matrix_1.3-4 vctrs_0.3.8 pillar_1.6.1 lifecycle_1.0.0 BiocManager_1.30.15
## [109] spatstat.geom_2.1-0 lmtest_0.9-38 jquerylib_0.1.4 RcppAnnoy_0.0.18 data.table_1.14.0 cowplot_1.1.1 irlba_2.3.3 httpuv_1.6.1 patchwork_1.1.1 R6_2.5.0 bookdown_0.22 promises_1.2.0.1 rio_0.5.26 KernSmooth_2.23-18 gridExtra_2.3 parallelly_1.26.0 codetools_0.2-18 MASS_7.3-53.1 assertthat_0.2.1 rprojroot_2.0.2 withr_2.4.2 sctransform_0.3.2 mgcv_1.8-36 parallel_4.0.4 hms_1.1.0 grid_4.0.4 rpart_4.1-15 rmarkdown_2.8 carData_3.0-4 Rtsne_0.15 ggpubr_0.4.0 shiny_1.6.0 lubridate_1.7.10